%matplotlib inline
from math import pi
import pandas
import collections
import matplotlib.pyplot as plt
import numpy as np
import datetime
import matplotlib.dates as mdates
from bokeh.plotting import figure, output_notebook, show
from bokeh import *
from bokeh.io import reset_output,output_file
from bokeh.models import CustomJS, DatetimeTickFormatter
from bokeh.models import FactorRange,ColumnDataSource
from bokeh.models.widgets import Dropdown,Select
from bokeh.layouts import column,row, widgetbox
import holoviews as hv
from bokeh.core.properties import value
from bokeh.palettes import Category20c
from bokeh.transform import cumsum
from bokeh.models.widgets import Panel, Tabs
reset_output()
hv.extension('bokeh')
#output_notebook()
data=pandas.read_csv("query1.csv")
def loadData(path):
data=pandas.read_csv(path)
return data
the percentage of canceled flights per day, throughout the entire data set
reset_output()
output_notebook()
histD=[]
for x in data.itertuples():
year= (getattr(x,"Year"))
month= (getattr(x,"Month"))
day= (getattr(x,"Day"))
mydate = datetime.date(year,month, day) #year, month, day
histD.append(mydate)
percentage = data['PercCancelled']
p = figure(plot_width=800, plot_height=400 ,x_axis_type="datetime",title="Percentage cancelled flights per day")
p.line(histD, percentage)
p.xaxis.formatter=DatetimeTickFormatter(
hours=["%d %B %Y"],
days=["%d %B %Y"],
months=["%d %B %Y"],
years=["%d %B %Y"],
)
t=show(row(p),notebook_handle=True)
reset_output()
weekly percentages of delays that are due to weather, throughout the entire data set
data=loadData("query2.csv")
output_notebook()
N=2003
ymDict=[]
ymOrdDict=[]
percDict=[]
for x in data.itertuples():
ym= (getattr(x,"YearWeek"))
pe=(getattr(x,"PercDelaysPerWeek"))
y,m=ym.split('-')
#print("y",y)
temp=(y,int(m),pe)
ymOrdDict.append(temp)
ymOrdDict=sorted(ymOrdDict, key = lambda x: (x[0], x[1]))
it=iter(ymOrdDict)
for x in it:
temp=(x[0],str(x[1]))
ymDict.append(temp)
pe=x[2]*100
percDict.append(pe)
source = ColumnDataSource(data=dict(x=ymDict, counts=percDict))
p1 = figure(x_range=FactorRange(*ymDict), plot_width=3500, plot_height=400,title="Weekly delay of flights due to weather"
,tools="hover",tooltips="year-week: @x; perc: @counts % ")
p1.vbar(x='x', top='counts', width=0.5, alpha=0.5,source=source)
p1.y_range.start = 0
p1.x_range.range_padding = 0.1
p1.yaxis.axis_label = 'Weekly Percentage'
#p1.xaxis.major_label_orientation = 1
p1.xaxis.major_label_orientation = pi/3
p1.xgrid.grid_line_color = None
k=show(row(p1),notebook_handle=True)
reset_output()
the percentage of flights belonging to a given "distance group" that were able to halve their departure delays by the time they arrived at their destinations. Distance groups assort flights by their total distance in miles. Flights with distances that are less than 200 miles belong in group 1, flights with distances that are between 200 and 399 miles belong in group 2, flights with distances that are between 400 and 599 miles belong in group 3, and so on. The last group contains flights whose distances are between 2400 and 2599 miles
datac=loadData("query3.csv")
reset_output()
output_notebook()
groups = ['1', '2', '3', '4', '5', '6','7','8','9','10','11','12','13']
years = ["halved", "notHalved"]
colors = ["#c9d9d3", "#718dbf"]
groupList=[]
for x in datac.itertuples():
gn= int(getattr(x,"DistGroup"))
halved= int(getattr(x,"HalvedPerDistGroup"))
total= int(getattr(x,"FlightsPerDistGroup"))
nonHalved=total-halved
perc= (getattr(x,"PercHalved"))
temp=(gn,halved,nonHalved,total,perc)
groupList.append(temp)
halvedList=[]
nonHalvedList=[]
percList=[]
totalList=[]
ordgroupList=sorted(groupList, key = lambda x: (x[0]))
for x in ordgroupList:
halvedList.append(x[1])
nonHalvedList.append(x[2])
percList.append(x[4]*100)
totalList.append(x[3])
data = {'groups' : groups,
'halved' : halvedList,
'notHalved' : nonHalvedList,
'perc':percList,
'total':totalList}
p2 = figure(x_range=groups, plot_width=650,plot_height=550, title="Halved Flight Graph", tools="hover", tooltips="$name: @$name; "
"total: @total; "
"perc of halved: @perc%")
p2.vbar_stack(years, x='groups', width=0.9, color=colors, source=data,legend=[value(x) for x in years])
p2.xaxis.axis_label = 'Distance Group'
p2.yaxis.axis_label = 'Total amount of flights'
k=show(p2,notebook_handle=True)
a weekly "penalty" score for each airport that depends on both the its incoming and outgoing flights. The score adds 0.5 for each incoming flight that is more than 15 minutes late, and 1 for each outgoing flight that is more than 15 minutes late.
%%opts Overlay [width=700 height=700 title_format="Distribution of flights for each distance group" xaxis=None yaxis=None]
%%opts Bars {+framewise}
%%opts Bars [width=600 height=2500 show_legend=False tools=['hover']]
%output max_frames=1000
%%opts Bars [invert_axes=True]
hv.extension('bokeh')
#output_file('myplot10.html')
#output_notebook()
datac=pandas.read_csv("query4.csv")
ymOrdDict=[]
menu=[]
#hv.renderer('bokeh')
#renderer = hv.plotting.mpl.MPLRenderer.instance(dpi=120)
yw=""
for x in datac.itertuples():
ab=(getattr(x,"YearWeek"))
airport=(getattr(x,"Airport"))
incoming=(getattr(x,"IncomingDelaysPerAirport"))
outgoing=(getattr(x,"OutgoingDelaysPerAirport"))
penalty=(getattr(x,"Penalty"))
a,b=ab.split('-')
temp=(int(a),int(b),airport,incoming,outgoing,int(penalty))
ymOrdDict.append(temp)
f=list(filter(lambda x: airport in x, menu))
if (len(f)==0):
menu.append(airport)
#print(ymOrdDict[:1000])
ymOrdDict=sorted(ymOrdDict, key = lambda x: (x[0], x[1]))
df = pandas.DataFrame.from_records(ymOrdDict, columns=['year','week','airport','incoming','outgoing','penalty'])
keys=[]
for x in df.itertuples():
one=(getattr(x,"year"))
one=(getattr(x,"week"))
kdims=[('airport','Airport'),('week','Week'),('year','Year')]
vdims = [('penalty', 'Penalty')]
macro = hv.Dataset(df,kdims,vdims,label='Weekly penalty per Airport, given:')
curve = macro.to(hv.Bars, 'Airport', 'Penalty')
#renderer = hv.plotting.mpl.MPLRenderer.instance(dpi=120)
curve